{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"U90jLcxCCMrN"},"outputs":[],"source":["# Install Nvidia NeMo toolkit and its dependencies, including all optional components.\n","!pip install nemo_toolkit['all']\n","\n","# Import NeMo's core package.\n","import nemo\n","\n","# Import NeMo's ASR collection, which includes complete ASR models and building blocks.\n","import nemo.collections.asr as nemo_asr"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Jdd72LoUH3uk"},"outputs":[],"source":["# Import other necessary libraries for data manipulation and audio file management.\n","import numpy as np\n","import pandas as pd\n","import librosa"]},{"cell_type":"markdown","metadata":{"id":"Iku3WSS2t_-j"},"source":["## Import Data"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":25498,"status":"ok","timestamp":1677761213523,"user":{"displayName":"Camille Landesvatter","userId":"08465821237712601560"},"user_tz":-60},"id":"A-45t0FTG4d5","outputId":"eeb7b892-232b-4e14-a956-cfc9af81c44e"},"outputs":[],"source":["# Mount the drive that contains wav files\n","from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZeBfTXaGHcC6"},"outputs":[],"source":["# Find all file paths containing WAV files using librosa's utility function.\n","# Note: Files with a size less than 100KB (~ <2 seconds) were excluded in a previous step,\n","# as librosa.load cannot resample them to 16000Hz.\n","\n","\n","# Define the directory path where the WAV files are located.\n","files = librosa.util.find_files('path-containing-wav-files', ext='wav', recurse=False)\n","\n","# Convert the list of file paths to a NumPy array.\n","files = np.asarray(files)\n","\n","#len(files)\n","#files"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Q1T5VET9UFdO"},"outputs":[],"source":["# Loop over the WAV files and determine their sample rates using librosa.get_samplerate.\n","\n","for wav_file in files:\n","    sr = librosa.get_samplerate(wav_file)\n","    print(f'{wav_file}: {sr} Hz')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"0F-Gnns8ZQsK"},"outputs":[],"source":["# In the same directory, resample and write all files to 16000hz\n","\n","from scipy.io import wavfile\n","\n","# Set the new sample rate\n","new_sample_rate = 16000\n","\n","# Loop over all files in the list\n","for filepath in files:\n","    # Load the audio data and current sample rate\n","    y, sr = librosa.load(filepath, sr=None)\n","\n","    # Resample the audio to the new sample rate\n","    y_resampled = librosa.resample(y, orig_sr=sr, target_sr=new_sample_rate)\n","\n","    # Write the resampled audio to a new WAV file\n","    wavfile.write(filepath, new_sample_rate, y_resampled)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1187,"status":"ok","timestamp":1677682988497,"user":{"displayName":"Camille Landesvatter","userId":"08465821237712601560"},"user_tz":-60},"id":"UsYTDmvcr2Y8","outputId":"75189be5-be96-4f9c-b782-b7b6918ac2c1"},"outputs":[{"data":{"text/plain":["2993"]},"execution_count":19,"metadata":{},"output_type":"execute_result"}],"source":["# Import the resampled files\n","\n","files_split1 = librosa.util.find_files('path-containing-resampled-wav-files', ext='wav')\n","files = np.asarray(files_split1)\n","\n","#len(files)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1061,"status":"ok","timestamp":1677682999807,"user":{"displayName":"Camille Landesvatter","userId":"08465821237712601560"},"user_tz":-60},"id":"agAGZTB_r6lJ","outputId":"c57d4d3a-88d6-4689-c13e-15f1e8af876a"},"outputs":[{"data":{"text/plain":["16000"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":[" # Loop over the WAV files and get their sample rates\n","librosa.get_samplerate(files[1])\n","\n","#16000"]},{"cell_type":"markdown","metadata":{"id":"tuvrsbDvuEy6"},"source":["## Import Model"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"b0rcWrLHu_QL"},"outputs":[],"source":["# Specify the pretrained ASR (Automatic Speech Recognition) model to be used.\n","asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name=\"stt_en_conformer_transducer_xxlarge\")"]},{"cell_type":"markdown","metadata":{"id":"iITTpJ-VuHrJ"},"source":["## Transcription"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5uPFEWodJT-g"},"outputs":[],"source":["# Use \"%%capture\" to capture and suppress the output of the following cell.\n","%%capture\n","\n","# Initialize an empty list 'transcriptions_single' to store single transcriptions.\n","transcriptions = []\n","\n","# Iterate through the list of transcriptions and extract the second element (transcribed text) from each.\n","for i in files:\n","  x = asr_model.transcribe([i])\n","  transcriptions.append(x)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":408,"status":"ok","timestamp":1677685395446,"user":{"displayName":"Camille Landesvatter","userId":"08465821237712601560"},"user_tz":-60},"id":"JhmI1KBa9_EM","outputId":"9f97643a-fd1f-4faa-eafd-c074629ee7ac"},"outputs":[{"data":{"text/plain":["2993"]},"execution_count":23,"metadata":{},"output_type":"execute_result"}],"source":["#len(transcriptions)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"23mqe00ULZet"},"outputs":[],"source":["# Initialize an empty list 'transcriptions_single' to store single transcriptions.\n","transcriptions_single=[]\n","#type(transcriptions_single)\n","\n","# Loop through the list of transcriptions and extract the second element (transcribed text) from each.\n","for i in transcriptions:\n","  transcriptions_single.append(i[1])"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8Q0ckh0ywuOM"},"outputs":[],"source":["# Merge the transcribed text ('strings') with a 'data' DataFrame by adding a new column named 'transcription'.\n","data=[]\n","data[\"transcription\"]=transcriptions_single\n","data\n","\n","# To access the transcribed text for a specific sample, you can use:\n","#transcription[0][\"text\"]"]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[{"file_id":"11HjdygAgw9Z2c5-m6xru2C5CjWbtkMik","timestamp":1673620932389}]},"gpuClass":"standard","kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}
